
## Code developed for Menzies et al. "Bayesian methods for calibrating
## health policy models: a tutorial" Pharmacoeconomics.

##############################################
################### MODEL ####################
##############################################
# This function estimates outcomes describing epidemiology of a hypothetical disease
# as well as outcomes (life-years, costs) for estimating cost-effectiveness of a policy 
# to expand treatment access to individual's with early stage disease.

 
  mod <- function(par_vector,project_future=FALSE) {
    # par_vector: a vector of model parameters
    # project_future: TRUE/FALSE, whether to project future outcomes for policy comparison
    pop_size   <- 1e6             # population size hard-coded as 1 million
    mu_b       <- 0.015           # background mortality rate hard-coded as 0.015
    mu_e       <- par_vector[1]   # cause-specific mortality rate with early-stage disease
    mu_l       <- par_vector[2]   # cause-specific mortality rate with late-stage disease
    mu_t       <- par_vector[3]   # cause-specific mortality rate on treatment
    p          <- par_vector[4]   # transtion rate from early to late-stage disease
    r_l <- r_e <- par_vector[5]   # rate of uptake onto treatment (r_l = late-stage disease;r_e = early-stage disease)
    rho        <- par_vector[6]   # effective contact rate 
    b          <- par_vector[7]   # fraction of population in at-risk group
    c          <- par_vector[8]   # annual cost of treatment  

    ######## Prepare to run model ###################
    n_yrs    <- if(project_future) { 51 } else { 30 }  # no. years to simulate (30 to present, 51 for 20 year analytic horizon)
    sim      <- if(project_future) { 1:2 } else { 1 }  # which scenarios to simulate: 1 = base case, 2 = expanded treatment access
    v_mu     <- c(0,0,mu_e,mu_l,mu_t)+mu_b             # vector of mortality rates
    births   <- pop_size*mu_b*c(1-b,b)                 # calculate birth rate for equilibrium population before epidemic
    init_pop <- pop_size*c(1-b,b-0.001,0.001,0,0,0)    # creates starting vector for population
    trace    <- matrix(NA,12*n_yrs,6)                  # creates a table to store simulation trace
    colnames(trace) <- c("N","S","E","L","T","D")
    results  <- list()                                 # creates a list to store results
  
    ######## Run model ###################
    for(s in sim) {
      P0 <- P1 <- init_pop 
      for(m in 1:(12*n_yrs)) {
        lambda    <- rho*sum(P0[3:4])/sum(P0[2:5]) # calculates force of infection
        P1[1:2]   <- P1[1:2]+births/12             # births
        P1[-6]    <- P1[-6]-P0[-6]*v_mu/12         # deaths: N, S, E, L, T, to D
        P1[6]     <- P1[6]+sum(P0[-6]*v_mu/12)     # deaths:N, S, E, L, T, to D
        P1[2]     <- P1[2]-P0[2]*lambda/12         # infection: S to E
        P1[3]     <- P1[3]+P0[2]*lambda/12         # infection: S to E
        P1[3]     <- P1[3]-P0[3]*p/12              # progression: E to L
        P1[4]     <- P1[4]+P0[3]*p/12              # progression: E to L
        P1[4]     <- P1[4]-P0[4]*r_l/12            # treatment uptake: L to T
        P1[5]     <- P1[5]+P0[4]*r_l/12            # treatment uptake: L to T
        if(s==2 & m>(12*30)) {
          P1[3]   <- P1[3]-P0[3]*r_e/12            # treatment uptake: E to T (scenario 2)
          P1[5]   <- P1[5]+P0[3]*r_e/12            # treatment uptake: E to T (scenario 2)
        }
        trace[m,] <- P0 <- P1                      # fill trace, reset pop vectors
      }
      results[[s]] <- trace                        # save results for each scenario
    }
  
    ######## Report results ###################
    if(project_future==FALSE) {
      ## Return calibration metrics, if project_future = FALSE
      return(list(prev = (rowSums(trace[,3:5])/rowSums(trace[,1:5]))[c(10,20,30)*12],  # Prevalence at 10,20,30 years
                  surv = 1/(v_mu[3]+p)+ p/(v_mu[3]+p)*(1/v_mu[4]),                     # HIV survival without treatment
                  tx   = trace[30*12,5]                                                # Treatment volume at 30 years
                  ) )
    } else {
    ## Policy projections for CE analysis, if project_future = TRUE
      return(list(trace0   = results[[1]],     # Trace without expanded treatment access
                  trace1   = results[[2]],     # Trace with expanded treatment access
                  inc_LY   = sum(results[[2]][(30*12+1):(51*12),-6]-results[[1]][(30*12+1):(51*12),-6])/12,  # incr. LY lived with expanded tx
                  inc_cost = sum(results[[2]][(30*12+1):(51*12),5]-results[[1]][(30*12+1):(51*12),5])*c/12   # incr. cost  with expanded tx                  
                  ) )  
    }
  }

# Test it 
  mod(rep(0.5,8),project_future=F)  # works
  mod(rep(0.5,8),project_future=T)  # works
  

###################################################
############### SAMPLE FROM PRIOR #################
###################################################
# This function returns a sample from the prior parameter distribution,
# with each column a different parameter and each row a different parameter set.  
# sample.prior.srs: this uses a simple random sample of the parameter space.
# sample.prior.lhs: this uses a latin-hypercube sample of the parameter space.
  
## Simple random sample
  sample.prior.srs <- function(n) {
    # n: the number of samples desired
    draws <- data.frame(mu_e  = rlnorm(n,log(0.05)-1/2*0.5^2,0.5),
                        mu_l  = rlnorm(n,log(0.25)-1/2*0.5^2,0.5),
                        mu_t  = rlnorm(n,log(0.025)-1/2*0.5^2,0.5),
                        p     = rlnorm(n,log(0.1)-1/2*0.5^2,0.5),
                        r_l   = rlnorm(n,log(0.5)-1/2*0.5^2,0.5),
                        rho   = rlnorm(n,log(0.5)-1/2*0.5^2,0.5),
                        b     = rbeta(n,2,8),
                        c     = rlnorm(n,log(1000)-1/2*0.2^2,0.2)
                        )
    return(as.matrix(draws))
  }

## Latin hypercube sample
#  install.packages("lhs")
require(lhs)

  sample.prior.lhs <- function(n) {
    # n: the number of samples desired
    draws0 <- randomLHS(n=n,k=8)
    draws  <- data.frame( mu_e  = qlnorm(draws0[,1],log(0.05)-1/2*0.5^2,0.5),
                          mu_l  = qlnorm(draws0[,2],log(0.25)-1/2*0.5^2,0.5),
                          mu_t  = qlnorm(draws0[,3],log(0.025)-1/2*0.5^2,0.5),
                          p     = qlnorm(draws0[,4],log(0.1)-1/2*0.5^2,0.5),
                          r_l   = qlnorm(draws0[,5],log(0.5)-1/2*0.5^2,0.5),
                          rho   = qlnorm(draws0[,6],log(0.5)-1/2*0.5^2,0.5),
                          b     = qbeta(draws0[,7],2,8),
                          c     = qlnorm(draws0[,8],log(1000)-1/2*0.2^2,0.2)
                          )
    return(as.matrix(draws))
  }

  sample.prior <- sample.prior.lhs  # use the lhs version as this is more efficient

# Test it 
  sample.prior(3)  # works.

  
###################################################
################### RESULTS UNCALIBRATED ##########
###################################################
# This section uses mod and sample.prior functions to calculate results for the analysis 
# via Monte Carlo simulation, without making use of the calibration data.
  
# Draw sample from prior
  set.seed(1234)                             # set random seed for reproducibility
  samp <- sample.prior(1e5)                  # 100,000 draws from prior
  
# Generate estimates for inc cost and inc LY via MC simulation (may take a while)
  IncCost <- IncLY <- rep(NA,nrow(samp))     # vectors to collect results
  for(i in 1:nrow(samp)) { # i=1
    tmp        <- mod(samp[i,],project_future=T)
    IncLY[i]   <- tmp$inc_LY
    IncCost[i] <- tmp$inc_cost
    if(i/100==round(i/100,0)) { 
      cat('\r',paste(i/nrow(samp)*100,"% done",sep="")) 
    } 
  }
  
  Uncalib <- list(samp=samp,IncCost=IncCost,IncLY=IncLY)
  # save(Uncalib,file="Uncalib_results.rData")
  # load("Uncalib_results.rData")
  
 ### Results for uncalibrated model
  IncCost <- Uncalib$IncCost
  IncLY   <- Uncalib$IncLY
  
# Distribution of inc costs and inc LY
  hist(IncLY,breaks=100,col="blue3",border=F)
  hist(IncCost,breaks=100,col="red3",border=F)
  
# Summary results
  mean(IncLY/1e3); quantile(IncLY/1e3,c(1,39)/40)                     # 213 (6, 775) thousands
  mean(IncCost/1e6); quantile(IncCost/1e6,c(1,39)/40)                 # 277 (-34, 1235) millions
  mean(IncCost)/mean(IncLY); quantile(IncCost/IncLY,c(1,39)/40)       # 1300 (dominant, 5720)
  

###################################################
################### LIKELIHOOD ####################
###################################################
# This function calculates the log-prior for a parameter set or matrix of parameter sets excluding c,
# the parameter for treatment cost. c is fixed at an arbitrary value (1) as it has no role in the calibration.
  
  l_likelihood <- function(par_vector) {
    # par_vector: a vector (or matrix) of model parameters
    if(is.null(dim(par_vector))) par_vector <- t(par_vector)
    llik <- rep(0,nrow(par_vector))
    for(j in 1:nrow(par_vector)) {
      jj <- tryCatch( {
      res_j <- mod(c(as.numeric(par_vector[j,]),1))      
      llik[j] <- llik[j]+sum(dbinom(c(25,75,50),500,res_j[["prev"]], log=TRUE)) # prevalence likelihood
      llik[j] <- llik[j]+dnorm(10,res_j[["surv"]],2/1.96, log=TRUE)             # survival likelihood
      llik[j] <- llik[j]+dnorm(75000,res_j[["tx"]],5000/1.96, log=TRUE)         # treatment volume likelihood
      }, error = function(e) NA)
      if(is.na(jj)) { llik[j] <- -Inf } 
    }
    return(llik)
  }
  
# Test it 
  l_likelihood(rbind(rep(0.5,7),rep(0.6,7))) # works


###################################################
##################### PRIOR #######################
###################################################
# This function calculates the log-likelihood for a parameter set or matrix of parameter sets excluding c,
# the parameter for treatment cost. c is fixed at an arbitrary value (1) as it has no role in the calibration.
  
    l_prior <- function(par_vector) {
    # par_vector: a vector (or matrix) of model parameters (omits c)
      if(is.null(dim(par_vector))) par_vector <- t(par_vector)
      lprior <- rep(0,nrow(par_vector))
      lprior <- lprior+dlnorm(par_vector[,1],log(0.05 )-1/2*0.5^2,0.5,log=TRUE)    # mu_e
      lprior <- lprior+dlnorm(par_vector[,2],log(0.25 )-1/2*0.5^2,0.5,log=TRUE)    # mu_l
      lprior <- lprior+dlnorm(par_vector[,3],log(0.025)-1/2*0.5^2,0.5,log=TRUE)    # mu_t
      lprior <- lprior+dlnorm(par_vector[,4],log(0.1  )-1/2*0.5^2,0.5,log=TRUE)    # p
      lprior <- lprior+dlnorm(par_vector[,5],log(0.5  )-1/2*0.5^2,0.5,log=TRUE)    # r_l
      lprior <- lprior+dlnorm(par_vector[,6],log(0.5  )-1/2*0.5^2,0.5,log=TRUE)    # rho
      lprior <- lprior+dbeta( par_vector[,7],2,8,log=TRUE)                         # b
      return(lprior)
    }

# Test it 
  l_prior(rbind(rep(0.5,7),rep(0.6,7))) # works
  

###################################################
################## MAP ESTIMATION #################
###################################################
# This section obtains a single best-fitting parameter set via maximum a posteriori estimation.
# To do so, an optimization algorithm is used to identify the parameter set that maximizes
# the sum of log-prior plus log-likelihood, which is equal to the log posterior (plus a constant which can be ignored).
# Different optimization routines are tried, BFGS works best for this example. 

# Function for log-posterior
  l_post <- function(par_vector) {
    return( l_prior(par_vector) + l_likelihood(par_vector) )
    }

# Optimize with various methods in optim tool-box
  optOut_nm   <- optim(rep(.5,7),l_post              ,control=list(fnscale=-1)); optOut_nm    # est max(l_post) = -43.5
  optOut_cg   <- optim(rep(.5,7),l_post,method="CG"  ,control=list(fnscale=-1)); optOut_cg    # est max(l_post) = -12.0
  optOut_bfgs <- optim(rep(.5,7),l_post,method="BFGS",control=list(fnscale=-1)); optOut_bfgs  # est max(l_post) = -6.94

# Best fitting parameter set, incl. prior mode for c
  mode_c  <- exp(log(1000)-1/2*0.2^2-0.2^2)
  par_map <- c(optOut_bfgs$par,mode_c)
 
   
###################################################
#################### RESULTS MAP ##################
###################################################
# Results for model with best fitting parameter set identified with MAP
  
  res_map <- mod(par_map,project_future = T)
  res_map[["inc_cost"]]                       # inc cost
  res_map[["inc_LY"]]                         # inc cost
  res_map[["inc_cost"]]/res_map[["inc_LY"]]   # icer
  
  
###################################################
###################### SIR ########################
###################################################
# This section obtains a sample from the posterior parameter distribution via SIR.

# First, lets redefine sample.prior to only provide samples for the first seven parameters, excluding c.
  sample.prior <- function(n) {
    draws0 <- randomLHS(n=n,k=7)
    draws  <- data.frame( mu_e  = qlnorm(draws0[,1],log(0.05)-1/2*0.5^2,0.5),
                          mu_l  = qlnorm(draws0[,2],log(0.25)-1/2*0.5^2,0.5),
                          mu_t  = qlnorm(draws0[,3],log(0.025)-1/2*0.5^2,0.5),
                          p     = qlnorm(draws0[,4],log(0.1)-1/2*0.5^2,0.5),
                          r_l   = qlnorm(draws0[,5],log(0.5)-1/2*0.5^2,0.5),
                          rho   = qlnorm(draws0[,6],log(0.5)-1/2*0.5^2,0.5),
                          b     = qbeta(draws0[,7],2,8))
    return(as.matrix(draws))
  }
  
# Generate 100,000 samples from prior
  n_samp <- 1e5
  samp_i <- sample.prior(n_samp)

# Calculate likelihood for each parameter set in samp_i
  llik_i <- rep(NA,n_samp)
  for(i in 1:n_samp) { 
    llik_i[i] <- l_likelihood(mod(as.numeric(c(samp_i[i,],1))))
    if(i/100==round(i/100,0)) { 
      cat('\r',paste(i/nrow(samp_i)*100,"% done",sep="")) 
    } 
  }

# Calculate weights for resample (i.e. exponentiate the log-likelihood)
  wt <- exp(llik_i-max(llik_i))

# Resample from samp_i with wt as sampling weights
  id_samp  <- sample.int(n_samp,replace=T,prob=wt)
  post_sir <- samp_i[id_samp,]

# Unique parameter sets
  length(unique(id_samp)) # 797

# Effective sample size
  sum(table(id_samp))^2/sum(table(id_samp)^2) # 88.26

# Max weight
  max(table(id_samp))/sum(table(id_samp)) # 0.033
  
# Could use the parameter sets in post_sir to caliclate results, but lets 
# try a more efficient technique: IMIS.

###################################################
###################### IMIS #######################
###################################################
# This section obtains a sample from the posterior parameter distribution via IMIS
#  install.packages("IMIS")
require(IMIS)
  
# IMIS needs three functions to be defined: 
#   sample.prior -- draws samples, and we have already created this 
  
#   prior -- evaluates prior density of a parameter set or sets
  prior <- function(par_vector) { 
    exp(l_prior(par_vector))
    }
  
#   likelihood -- evaluates likelihood of a parameter set or sets

  likelihood <- function(par_vector) { 
    exp(l_likelihood(par_vector)) 
    }
  
# Run IMIS
  set.seed(1234)
  imis_res <- IMIS(B=100,B.re=1e4,number_k=400,D=1)  

# Draw samples for parameter c from prior
  set.seed(1234)
  c_i       <- rlnorm(1e4,log(1000)-1/2*0.2^2,0.2)
  post_imis <- cbind(imis_res$resample,c_i)
  
# Unique parameter sets
  length(unique(imis_res$resample[,1])) # 6372

# Effective sample size
  sum(table(imis_res$resample[,1]))^2/ sum(table(imis_res$resample[,1])^2) # 4713

# Max weight
  max(table(imis_res$resample[,1]))/sum(table(imis_res$resample[,1])) # 8e-04

  
###################################################
################## RESULTS: IMIS ##################
###################################################

#  Generate inc cost / inc LY using imis_post
  IncCostC <- IncLYC <- rep(NA,nrow(post_imis))
  for(i in 1:nrow(post_imis)) { # i=1
    tmp <- mod(post_imis[i,],project_future=T)
    IncLYC[i]   <- tmp$inc_LY
    IncCostC[i] <- tmp$inc_cost
    if(i/100==round(i/100,0)) { 
      cat('\r',paste(i/nrow(samp_i)*100,"% done",sep="")) 
    } 
  }

  Calib <- list(post_imis=post_imis,IncCostC=IncCostC,IncLYC=IncLYC)
  # save(Calib,file="Calib_results.rData")
  # load("Calib_results.rData")
  
# Distribution of inc costs and inc LY
  hist(IncLYC,breaks=100,col="blue3",border=F)
  hist(IncCostC,breaks=100,col="red3",border=F)
  
# Summary results
  mean(IncLYC/1e3); quantile(IncLYC/1e3,c(1,39)/40)                     # 130 (64, 228) thousands
  mean(IncCostC/1e6); quantile(IncCostC/1e6,c(1,39)/40)                 # 123 (-4, 312) millions
  mean(IncCostC)/mean(IncLYC); quantile(IncCostC/IncLYC,c(1,39)/40)     # 947 (dominant, 2010)

  
  
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-# The end.  #-#-#-#-#-#-#-#-#-#-#-#-#-#-#
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
  
